library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(labelled)
## Warning: package 'labelled' was built under R version 4.1.3
# Read in data

survey <- tibble::tribble(
  ~participant_id, ~pet_1, ~pet_2, ~pet_3, ~pet_type, ~participant_age,
  "10", 2, 1, 1, 1, 12,
  "22", 2, 2, -99, 2, 14, 
  "13", 4, 4, 1, 1, 15,
  "11", 2, 1, 1, 1, 13,
  "15", 2, -99, 3, 2, 16, 
  "20", 4, 4, 1, 1, 17,
  "14", 2, 1, 1, 1, 13,
  "17", 2, 3, 3, 2, 16, 
  "25", 4, 4, 1, 1, 17,
  "27", 2, 1, 1, 1, 13,
  "28", 2, 3, 3, 2, NA, 
  "18", 4, 4, 1, 1, 11)


# Read in data dictionary

dict <- tibble::tribble(
  ~var_name, ~label,
  "participant_id", "Respondent study ID",
  "participant_age", "How old are you?",
  "pet_type", "What type of pet do you have",
  "pet_1", "Within your family, your pet likes you best", 
  "pet_2", "You talk to your pet as a friend",
  "pet_3", "You buy presents for your pet")

# Add value labels

survey <- survey %>%
  mutate(across(pet_1:pet_3, 
                ~labelled(., labels = c(`almost never`=1, 
                                                   sometimes=2, often=3, `almost always`=4,
                                        `missing response` = -99)))) %>%
  set_value_labels(pet_type = c(dog = 1, cat = 2)) %>%
  mutate(across(pet_1:pet_3, ~(`na_values<-`
                               (., c(-99)))))


# Add variable labels to variables

dict_labels <- dict %>%
  select(var_name , label) %>%
  deframe() %>%
  as.list()

survey <- survey %>%
  set_variable_labels(.labels = dict_labels, .strict = FALSE)

# Reorder

var_order <- dict %>%
  dplyr::select(var_name) %>%
  dplyr::pull()

# reorder

survey <- survey %>%
  relocate(all_of(var_order))

codebook_data <- survey

Here, we’re just setting a few options.

knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())

Now, we’re preparing our data for the codebook.

library(codebook)
## Warning: package 'codebook' was built under R version 4.1.3
## 
## Attaching package: 'codebook'
## The following object is masked from 'package:labelled':
## 
##     to_factor
# to import an SPSS file from the same folder uncomment and edit the line below
# codebook_data <- rio::import("mydata.sav")
# for Stata
# codebook_data <- rio::import("mydata.dta")
# for CSV
# codebook_data <- rio::import("mydata.csv")

# omit the following lines, if your missing values are already properly labelled
codebook_data <- detect_missing(codebook_data,
    only_labelled = TRUE, # only labelled values are autodetected as
                                   # missing
    negative_values_are_missing = TRUE, # negative values are missing values
    ninety_nine_problems = TRUE,   # 99/999 are missing values, if they
                                   # are more than 5 MAD from the median
    )

# If you are not using formr, the codebook package needs to guess which items
# form a scale. The following line finds item aggregates with names like this:
# scale = scale_1 + scale_2R + scale_3R
# identifying these aggregates allows the codebook function to
# automatically compute reliabilities.
# However, it will not reverse items automatically.
codebook_data <- detect_scales(codebook_data)
## Warning in detect_scales(codebook_data): pet items found, but no aggregate

Create codebook

codebook(codebook_data)

Metadata

Description

Dataset name: codebook_data

The dataset has N=12 rows and 6 columns. 9 rows have no missing values on any column.

Metadata for search engines
  • Date published: 2022-09-09
x
participant_id
participant_age
pet_type
pet_1
pet_2
pet_3

#Variables

participant_id

Respondent study ID

Distribution

Distribution of values for participant_id

Distribution of values for participant_id

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
participant_id Respondent study ID character 0 1 12 0 2 2 0

participant_age

How old are you?

Distribution

Distribution of values for participant_age

Distribution of values for participant_age

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
participant_age How old are you? numeric 1 0.9166667 11 14 17 14.27273 2.053821 <U+2583><U+2586><U+2582><U+2582><U+2587>

pet_type

What type of pet do you have

Distribution

Distribution of values for pet_type

Distribution of values for pet_type

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd n_value_labels hist
pet_type What type of pet do you have haven_labelled 0 1 1 1 2 1.333333 0.492366 2 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2583>

Value labels

Response choices
name value
dog 1
cat 2

pet_1

Within your family, your pet likes you best

Distribution

Distribution of values for pet_1

Distribution of values for pet_1

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd n_value_labels hist na_values
pet_1 Within your family, your pet likes you best haven_labelled 0 1 2 2 4 2.666667 0.9847319 5 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2583> -99

Value labels

Response choices
name value
almost never 1
sometimes 2
often 3
almost always 4
missing response -99

pet_2

You talk to your pet as a friend

Distribution

Distribution of values for pet_2

Distribution of values for pet_2

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd n_value_labels hist na_values
pet_2 You talk to your pet as a friend haven_labelled 1 0.9166667 1 3 4 2.545454 1.368476 5 <U+2587><U+2581><U+2582><U+2581><U+2581><U+2583><U+2581><U+2587> -99

Value labels

Response choices
name value
almost never 1
sometimes 2
often 3
almost always 4
[-99] missing response NA

pet_3

You buy presents for your pet

Distribution

Distribution of values for pet_3

Distribution of values for pet_3

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd n_value_labels hist na_values
pet_3 You buy presents for your pet haven_labelled 1 0.9166667 1 1 3 1.545454 0.9341987 5 <U+2587><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581><U+2583> -99

Value labels

Response choices
name value
almost never 1
sometimes 2
often 3
almost always 4
[-99] missing response NA

Missingness report

Codebook table

JSON-LD metadata

The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "name": "codebook_data",
  "datePublished": "2022-09-09",
  "description": "The dataset has N=12 rows and 6 columns.\n9 rows have no missing values on any column.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n|name            |label                                       | n_missing|\n|:---------------|:-------------------------------------------|---------:|\n|participant_id  |Respondent study ID                         |         0|\n|participant_age |How old are you?                            |         1|\n|pet_type        |What type of pet do you have                |         0|\n|pet_1           |Within your family, your pet likes you best |         0|\n|pet_2           |You talk to your pet as a friend            |         1|\n|pet_3           |You buy presents for your pet               |         1|\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
  "keywords": ["participant_id", "participant_age", "pet_type", "pet_1", "pet_2", "pet_3"],
  "@context": "http://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "participant_id",
      "description": "Respondent study ID",
      "@type": "propertyValue"
    },
    {
      "name": "participant_age",
      "description": "How old are you?",
      "@type": "propertyValue"
    },
    {
      "name": "pet_type",
      "description": "What type of pet do you have",
      "value": "1. dog,\n2. cat",
      "maxValue": 2,
      "minValue": 1,
      "@type": "propertyValue"
    },
    {
      "name": "pet_1",
      "description": "Within your family, your pet likes you best",
      "value": "1. almost never,\n2. sometimes,\n3. often,\n4. almost always,\n-99. missing response",
      "maxValue": 4,
      "minValue": -99,
      "@type": "propertyValue"
    },
    {
      "name": "pet_2",
      "description": "You talk to your pet as a friend",
      "value": "1. almost never,\n2. sometimes,\n3. often,\n4. almost always,\nNA. [-99] missing response",
      "maxValue": 4,
      "minValue": 1,
      "@type": "propertyValue"
    },
    {
      "name": "pet_3",
      "description": "You buy presents for your pet",
      "value": "1. almost never,\n2. sometimes,\n3. often,\n4. almost always,\nNA. [-99] missing response",
      "maxValue": 4,
      "minValue": 1,
      "@type": "propertyValue"
    }
  ]
}`